# from urllib import request
import re
import time

import requests
# import re
from lxml import etree

home_res = requests.get("https://tieba.baidu.com/t/f/?class=college")

# items = re.findall(r'<a class="each_topic_entrance_item" href="//tieba.baidu.com/t/f/(\d+)" data-fid=".*?"> (\w+)</a>', home_res.text)
# for item in items:
# 	school_url = f"https://tieba.baidu.com/t/f/{item[0]}"
# 	school_name = item[1]
# 	print(f"正在爬取学校 {school_name}")


tree = etree.HTML(home_res.text)
items = tree.xpath('//a[@class="each_topic_entrance_item"]')
for item in items:
    school_name = item.xpath('./text()')[0]
    school_url = f"https:{item.xpath('./@href')[0]}"
    print(school_url, school_name)

    detail_res = requests.get(school_url)
    detail_tree = etree.HTML(detail_res.text)
    detail_items = detail_tree.xpath('//div[@class="module_item"]')
    for detail_item in detail_items:
        module_name = detail_item.xpath('./p[@class="module_name"]/text()')[0]
        print(module_name)
        links = detail_item.xpath('.//a[@class="thread_title"]')
        for link in links:
            print(link.xpath('./text()')[0], f"http:{link.xpath('./@href')[0]}")

    time.sleep(1)
